# directory <- "X:/mchung/christina_yek_02/pipeline_output/"
directory <- "X:/mchung/sasha_mushegian_12/pipeline_output/"
print(directory)
## [1] "X:/mchung/sasha_mushegian_12/pipeline_output/"
library(bayestestR)
library(ComplexHeatmap)
library(DT)
library(ggplot2)
library(matrixStats)
library(randomcoloR)
library(reshape2)
library(plotly)
sessionInfo()
## R version 4.5.0 (2025-04-11 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] plotly_4.10.4 reshape2_1.4.4 randomcoloR_1.1.0.1
## [4] matrixStats_1.5.0 ggplot2_3.5.2 DT_0.33
## [7] ComplexHeatmap_2.24.0 bayestestR_0.16.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 circlize_0.4.16 shape_1.4.6.1
## [4] rjson_0.2.23 xfun_0.52 bslib_0.9.0
## [7] htmlwidgets_1.6.4 GlobalOptions_0.1.2 insight_1.3.0
## [10] vctrs_0.6.5 tools_4.5.0 generics_0.1.4
## [13] stats4_4.5.0 curl_6.2.2 parallel_4.5.0
## [16] tibble_3.2.1 cluster_2.1.8.1 pkgconfig_2.0.3
## [19] data.table_1.17.2 RColorBrewer_1.1-3 S4Vectors_0.46.0
## [22] lifecycle_1.0.4 compiler_4.5.0 farver_2.1.2
## [25] stringr_1.5.1 codetools_0.2-20 clue_0.3-66
## [28] htmltools_0.5.8.1 sass_0.4.10 yaml_2.3.10
## [31] lazyeval_0.2.2 tidyr_1.3.1 pillar_1.10.2
## [34] crayon_1.5.3 jquerylib_0.1.4 cachem_1.1.0
## [37] iterators_1.0.14 foreach_1.5.2 tidyselect_1.2.1
## [40] digest_0.6.37 Rtsne_0.17 stringi_1.8.7
## [43] dplyr_1.1.4 purrr_1.0.4 fastmap_1.2.0
## [46] colorspace_2.1-1 cli_3.6.5 magrittr_2.0.3
## [49] withr_3.0.2 scales_1.4.0 rmarkdown_2.29
## [52] httr_1.4.7 png_0.1-8 GetoptLong_1.0.5
## [55] evaluate_1.0.3 knitr_1.50 IRanges_2.42.0
## [58] V8_6.0.3 doParallel_1.0.17 viridisLite_0.4.2
## [61] rlang_1.1.6 Rcpp_1.0.14 glue_1.8.0
## [64] BiocGenerics_0.54.0 rstudioapi_0.17.1 jsonlite_2.0.0
## [67] R6_2.6.1 plyr_1.8.9
bad_taxa <- c("Homo sapiens","synthetic construct")
counts <- list(taxa = read.delim(paste0(directory,"/tables/counts.taxa.bracken.tsv"),
check.names = F))
counts$taxa <- counts$taxa[!(rownames(counts$taxa) %in% bad_taxa),]
counts$module <- read.delim(paste0(directory,"/tables/counts.ko.module.fmap.tsv"),
quote = "",check.names = F)
counts$ortho <- read.delim(paste0(directory,"/tables/counts.ko.ortho.fmap.tsv"),
quote = "",check.names = F)
counts$pathway <- read.delim(paste0(directory,"/tables/counts.ko.pathway.fmap.tsv"),
quote = "",check.names = F)
set.seed(123456)
if(ncol(counts$taxa) >= 300){
subset <- sort(sample(colnames(counts$taxa),300))
}else{
subset <- colnames(counts$taxa)
}
df <- as.data.frame(cbind(colnames(counts$taxa),
colSums(counts$taxa)))
names(df) <- c("sample","counts")
df$counts <- as.numeric(as.character(df$counts))
datatable(df,rownames = F)
fig <- plot_ly(x=colnames(counts$taxa[subset]),y=colSums(counts$taxa[subset]),color=I("blue"),
name="Taxa",
type="bar")
fig <- fig %>% layout(title = "Bracken Taxa Counts",
yaxis = list(title = "counts"))
fig
df <- df[df[,1] %in% subset,]
p <- ggplot()+
geom_density(aes(x=df$counts),fill="blue")+
geom_rug(aes(x=df$counts,label=df$sample),color="blue")+
stat_boxplot(geom = "vline", aes(xintercept = 0))+
labs(x="counts")+
theme_bw()
fig <- ggplotly(p)
fig
relab <- as.matrix(apply(counts$taxa[subset],2,function(x){x/sum(x)*100}))
top_taxa <- rownames(relab[order(-rowMedians(relab)),])[1:50]
set.seed(446)
col <- distinctColorPalette(length(top_taxa))
relab.top_taxa <- as.data.frame(relab[top_taxa,])
relab.top_taxa["Other",] <- 100-colSums(relab.top_taxa)
df <- reshape2::melt(as.matrix(relab.top_taxa))
df[,1] <- factor(df[,1],levels=c(top_taxa,"Other"))
colnames(df) <- c("taxa","sample","relab")
p <- ggplot(df,aes(x=sample,y=relab,fill=taxa))+
geom_bar(stat="identity")+
scale_fill_manual(values=c(rev(col),"grey"))+
scale_y_continuous(expand=c(0,0))+
labs(y="relative abundance",fill="Taxa")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
fig <- ggplotly(p,width=1200,height=800)
fig
kuniq <- list(cov = read.delim(paste0(directory,"/tables/cov.taxa.krakenuniq.tsv"),
check.names = F),
dup = read.delim(paste0(directory,"/tables/dup.taxa.krakenuniq.tsv"),
check.names = F),
kmers = read.delim(paste0(directory,"/tables/kmers.taxa.krakenuniq.tsv"),
check.names = F))
kuniq <- lapply(kuniq,function(x){return(x[colnames(counts$taxa)])})
df <- reshape2::melt(as.matrix(relab[top_taxa,subset]))
df[,1] <- factor(df[,1],levels=rev(top_taxa))
colnames(df) <- c("taxa","sample","relab")
p <- ggplot(df,aes(y=relab,x=taxa,fill=taxa))+
geom_boxplot()+
scale_fill_manual(values=col)+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig
df <- reshape2::melt(as.matrix(kuniq$cov[top_taxa,subset]))
df[,1] <- factor(df[,1],levels=rev(top_taxa))
colnames(df) <- c("taxa","sample","cov")
p <- ggplot(df,aes(y=cov,x=taxa,fill=taxa))+
geom_boxplot()+
scale_fill_manual(values=col)+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig
df <- reshape2::melt(as.matrix(kuniq$dup[top_taxa,subset]))
df[,1] <- factor(df[,1],levels=rev(top_taxa))
colnames(df) <- c("taxa","sample","dup")
p <- ggplot(df,aes(y=dup,x=taxa,fill=taxa))+
geom_boxplot()+
scale_fill_manual(values=col)+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig
df <- reshape2::melt(as.matrix(kuniq$kmers[top_taxa,subset]))
df[,1] <- factor(df[,1],levels=rev(top_taxa))
colnames(df) <- c("taxa","sample","kmers")
p <- ggplot(df,aes(y=kmers,x=taxa,fill=taxa))+
geom_boxplot()+
scale_fill_manual(values=col)+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig
fig <- plot_ly(x=colnames(counts$pathway[subset]),y=colSums(counts$pathway[subset]),color=I("red"),
name="Pathway KOs",
type="bar")
fig <- fig %>% layout(title = "FMAP Pathway KO Counts",
yaxis = list(title = "counts"))
fig
df <- as.data.frame(cbind(colnames(counts$pathway[subset]),
colSums(counts$pathway[subset])))
names(df) <- c("sample","counts")
df$counts <- as.numeric(as.character(df$counts))
p <- ggplot()+
geom_density(aes(x=df$counts),fill="red")+
geom_rug(aes(x=df$counts,label=df$sample),color="red")+
stat_boxplot(geom = "vline", aes(xintercept = 0))+
labs(x="counts")+
theme_bw()
fig <- ggplotly(p)
fig
fig <- plot_ly(x=colnames(counts$ortho[subset]),y=colSums(counts$ortho[subset]),color=I("green"),
name="Ortho KOs",
type="bar")
fig <- fig %>% layout(title = "FMAP Ortho KO Counts",
yaxis = list(title = "counts"))
fig
df <- as.data.frame(cbind(colnames(counts$ortho[subset]),
colSums(counts$ortho[subset])))
names(df) <- c("sample","counts")
df$counts <- as.numeric(as.character(df$counts))
p <- ggplot()+
geom_density(aes(x=df$counts),fill="green")+
geom_rug(aes(x=df$counts,label=df$sample),color="green")+
stat_boxplot(geom = "vline", aes(xintercept = 0))+
labs(x="counts")+
theme_bw()
fig <- ggplotly(p)
fig
fig <- plot_ly(x=colnames(counts$module[subset]),y=colSums(counts$module[subset]),color=I("orange"),
name="Module KOs",
type="bar")
fig <- fig %>% layout(title = "FMAP Module KO Counts",
yaxis = list(title = "counts"))
fig
df <- as.data.frame(cbind(colnames(counts$module[subset]),
colSums(counts$module[subset])))
names(df) <- c("sample","counts")
df$counts <- as.numeric(as.character(df$counts))
p <- ggplot()+
geom_density(aes(x=df$counts),fill="orange")+
geom_rug(aes(x=df$counts,label=df$sample),color="orange")+
stat_boxplot(geom = "vline", aes(xintercept = 0))+
labs(x="counts")+
theme_bw()
fig <- ggplotly(p)
fig
df <- counts$pathway[subset]
relab <- apply(df,2,function(x){x/sum(x)*100})
top_kos <- rownames(relab)[order(-rowMedians(relab))][1:50]
df <- reshape2::melt(as.matrix(df[top_kos,]))
df[,1] <- factor(df[,1],levels=rev(top_kos))
colnames(df) <- c("kos","sample","kmers")
p <- ggplot(df,aes(y=kmers,x=kos))+
geom_boxplot(fill="red")+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig
df <- counts$ortho[subset]
relab <- apply(df,2,function(x){x/sum(x)*100})
top_kos <- rownames(relab)[order(-rowMedians(relab))][1:50]
df <- reshape2::melt(as.matrix(df[top_kos,]))
df[,1] <- factor(df[,1],levels=rev(top_kos))
colnames(df) <- c("kos","sample","kmers")
p <- ggplot(df,aes(y=kmers,x=kos))+
geom_boxplot(fill="green")+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig
df <- counts$module[subset]
relab <- apply(df,2,function(x){x/sum(x)*100})
top_kos <- rownames(relab)[order(-rowMedians(relab))][1:50]
df <- reshape2::melt(as.matrix(df[top_kos,]))
df[,1] <- factor(df[,1],levels=rev(top_kos))
colnames(df) <- c("kos","sample","kmers")
p <- ggplot(df,aes(y=kmers,x=kos))+
geom_boxplot(fill="orange")+
labs(y="relative abundance")+
theme_bw()+
theme(axis.title.y = element_blank(),
legend.position='none')+
coord_flip()
fig <- ggplotly(p,width=800,height=800)
fig